Data Cleaning

We will first import the dataset counties_fixed.csv

election <- vroom::vroom("data/counties_fixed.csv") %>% 
  clean_names() %>% 
  mutate(winner = case_when(
    percentage20_donald_trump >= percentage20_joe_biden ~ "Trump",
    percentage20_donald_trump <  percentage20_joe_biden ~ "Biden"),
    
    winner16 = case_when(
    percentage16_donald_trump >= percentage16_hillary_clinton ~ "Trump",
    percentage20_donald_trump <  percentage16_hillary_clinton ~ "Clinton")
    )
glimpse(election)
## Rows: 4,867
## Columns: 53
## $ x1                           <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12…
## $ county                       <chr> "Abbeville", "Acadia", "Accomack", "Ada"…
## $ state                        <chr> "SC", "LA", "VA", "ID", "IA", "KY", "MO"…
## $ percentage16_donald_trump    <dbl> 0.629, 0.773, 0.545, 0.479, 0.653, 0.806…
## $ percentage16_hillary_clinton <dbl> 0.346, 0.206, 0.428, 0.387, 0.300, 0.161…
## $ total_votes16                <dbl> 10724, 27386, 15755, 195587, 3759, 8231,…
## $ votes16_donald_trump         <dbl> 6742, 21159, 8582, 93748, 2456, 6637, 60…
## $ votes16_hillary_clinton      <dbl> 3712, 5638, 6737, 75676, 1127, 1323, 349…
## $ percentage20_donald_trump    <dbl> 0.661, 0.795, 0.542, 0.504, 0.697, 0.830…
## $ percentage20_joe_biden       <dbl> 0.330, 0.191, 0.447, 0.465, 0.286, 0.159…
## $ total_votes20                <dbl> 12433, 28425, 16938, 259389, 4183, 8766,…
## $ votes20_donald_trump         <dbl> 8215, 22596, 9172, 130699, 2917, 7275, 6…
## $ votes20_joe_biden            <dbl> 4101, 5443, 7578, 120539, 1197, 1391, 37…
## $ lat                          <dbl> 34.22333, 30.29506, 37.76707, 43.45266, …
## $ long                         <dbl> -82.46171, -92.41420, -75.63235, -116.24…
## $ cases                        <dbl> 805, 3182, 1227, 17451, 222, 517, 578, 8…
## $ deaths                       <dbl> 17, 102, 19, 181, 1, 22, 0, 11, 263, 1, …
## $ total_pop                    <dbl> 24788, 62607, 32840, 435117, 7192, 19304…
## $ men                          <dbl> 12044, 30433, 16079, 217999, 3552, 9632,…
## $ women                        <dbl> 12744, 32174, 16761, 217118, 3640, 9672,…
## $ hispanic                     <dbl> 1.3, 2.4, 8.8, 7.9, 1.7, 1.8, 2.3, 6.4, …
## $ white                        <dbl> 68.9, 77.5, 60.3, 85.2, 96.6, 93.4, 90.5…
## $ black                        <dbl> 27.6, 17.6, 28.3, 1.2, 0.3, 3.6, 2.4, 0.…
## $ native                       <dbl> 0.1, 0.1, 0.3, 0.4, 0.0, 0.1, 0.2, 41.7,…
## $ asian                        <dbl> 0.3, 0.1, 0.7, 2.6, 0.4, 0.1, 2.3, 0.6, …
## $ pacific                      <dbl> 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.2, …
## $ voting_age_citizen           <dbl> 19452, 45197, 24408, 316189, 5572, 15280…
## $ income                       <dbl> 35254, 40492, 42260, 60151, 49477, 36575…
## $ income_err                   <dbl> 2259, 2544, 2253, 1294, 2633, 3426, 2130…
## $ income_per_cap               <dbl> 19234, 21591, 24266, 31642, 28861, 18408…
## $ income_per_cap_err           <dbl> 799, 1002, 1564, 725, 2055, 1010, 1702, …
## $ poverty                      <dbl> 22.7, 21.5, 19.8, 11.8, 9.5, 21.5, 26.2,…
## $ child_poverty                <dbl> 32.1, 27.6, 31.8, 13.1, 12.1, 27.1, 20.7…
## $ professional                 <dbl> 27.2, 27.6, 31.1, 43.0, 28.2, 28.5, 36.8…
## $ service                      <dbl> 20.7, 16.9, 17.7, 16.6, 16.9, 15.9, 18.2…
## $ office                       <dbl> 20.8, 25.7, 18.8, 25.0, 20.0, 19.7, 24.1…
## $ construction                 <dbl> 10.6, 15.0, 15.1, 6.9, 17.3, 12.2, 9.4, …
## $ production                   <dbl> 20.7, 14.8, 17.3, 8.4, 17.6, 23.8, 11.5,…
## $ drive                        <dbl> 78.3, 83.2, 80.0, 80.7, 77.9, 84.5, 77.3…
## $ carpool                      <dbl> 11.1, 10.3, 10.6, 7.7, 12.4, 9.0, 12.1, …
## $ transit                      <dbl> 0.5, 0.2, 0.5, 0.5, 0.3, 0.0, 0.1, 0.1, …
## $ walk                         <dbl> 1.8, 1.6, 2.6, 1.5, 2.8, 2.6, 4.0, 2.8, …
## $ other_transp                 <dbl> 1.8, 2.2, 1.8, 2.8, 0.4, 0.5, 2.6, 1.0, …
## $ work_at_home                 <dbl> 6.5, 2.5, 4.5, 6.9, 6.2, 3.4, 4.0, 3.2, …
## $ mean_commute                 <dbl> 25.8, 27.6, 22.0, 20.4, 22.3, 22.2, 17.1…
## $ employed                     <dbl> 9505, 24982, 13837, 214984, 3680, 7988, …
## $ private_work                 <dbl> 78.8, 80.0, 74.6, 78.3, 73.8, 74.1, 73.6…
## $ public_work                  <dbl> 13.3, 12.1, 18.1, 15.0, 15.3, 15.8, 20.9…
## $ self_employed                <dbl> 7.8, 7.6, 7.1, 6.6, 10.4, 9.9, 5.3, 7.5,…
## $ family_work                  <dbl> 0.1, 0.3, 0.2, 0.1, 0.5, 0.1, 0.2, 0.5, …
## $ unemployment                 <dbl> 9.4, 8.9, 5.4, 4.3, 3.0, 6.2, 5.5, 5.5, …
## $ winner                       <chr> "Trump", "Trump", "Trump", "Trump", "Tru…
## $ winner16                     <chr> "Trump", "Trump", "Trump", "Trump", "Tru…

Now we want to import the dataset electoral_college.csv. Here we have information about how many electoral votes each state has. We will filter by 2020.

# Data set with Electoral votes from all the years

electoral_votes<- read_csv(here("data","electoral_college.csv")) %>% clean_names() %>%
  filter(year==2020)

Let’s look for duplicates

#remove empty rows and columns
temp<-remove_empty(election, which = c("rows","cols"))

#there were no empty rows to be removed
rm(temp)

#check for duplicates
#we look for entries on the same day and for the same country
election%>%get_dupes(county, state)
## # A tibble: 0 x 54
## # … with 54 variables: county <chr>, state <chr>, dupe_count <int>, x1 <dbl>,
## #   percentage16_donald_trump <dbl>, percentage16_hillary_clinton <dbl>,
## #   total_votes16 <dbl>, votes16_donald_trump <dbl>,
## #   votes16_hillary_clinton <dbl>, percentage20_donald_trump <dbl>,
## #   percentage20_joe_biden <dbl>, total_votes20 <dbl>,
## #   votes20_donald_trump <dbl>, votes20_joe_biden <dbl>, lat <dbl>, long <dbl>,
## #   cases <dbl>, deaths <dbl>, total_pop <dbl>, men <dbl>, women <dbl>,
## #   hispanic <dbl>, white <dbl>, black <dbl>, native <dbl>, asian <dbl>,
## #   pacific <dbl>, voting_age_citizen <dbl>, income <dbl>, income_err <dbl>,
## #   income_per_cap <dbl>, income_per_cap_err <dbl>, poverty <dbl>,
## #   child_poverty <dbl>, professional <dbl>, service <dbl>, office <dbl>,
## #   construction <dbl>, production <dbl>, drive <dbl>, carpool <dbl>,
## #   transit <dbl>, walk <dbl>, other_transp <dbl>, work_at_home <dbl>,
## #   mean_commute <dbl>, employed <dbl>, private_work <dbl>, public_work <dbl>,
## #   self_employed <dbl>, family_work <dbl>, unemployment <dbl>, winner <chr>,
## #   winner16 <chr>
electoral_votes %>% get_dupes(state,year)
## # A tibble: 0 x 4
## # … with 4 variables: state <chr>, year <dbl>, dupe_count <int>, votes <dbl>

Get POLYGONS from urbanmaps for counties in the US

counties_sf <- get_urbn_map("counties", sf = TRUE)
counties_sf <- counties_sf %>% 
  mutate(county_name2 = case_when(
    grepl('County$', county_name) ~ str_sub(county_name, end = -8),
    grepl('Parish$', county_name) ~ str_sub(county_name, end = -8),
    TRUE ~ county_name)
  )

Join with elections dataset.

data <- counties_sf %>% 
  left_join(election, by = c( "county_name2" = "county",  "state_abbv" = "state"))
glimpse(data)
## Rows: 3,142
## Columns: 59
## $ county_fips                  <chr> "04015", "12035", "20129", "28093", "295…
## $ state_abbv                   <chr> "AZ", "FL", "KS", "MS", "MO", "NM", "NC"…
## $ state_fips                   <chr> "04", "12", "20", "28", "29", "35", "37"…
## $ county_name                  <chr> "Mohave County", "Flagler County", "Mort…
## $ fips_class                   <chr> "H1", "H1", "H1", "H1", "C7", "H1", "H1"…
## $ state_name                   <chr> "Arizona", "Florida", "Kansas", "Mississ…
## $ county_name2                 <chr> "Mohave", "Flagler", "Morton", "Marshall…
## $ x1                           <dbl> 1929, 921, 2001, 1799, 2639, 1844, 2937,…
## $ percentage16_donald_trump    <dbl> 0.737, 0.589, 0.836, 0.444, 0.159, 0.233…
## $ percentage16_hillary_clinton <dbl> 0.222, 0.383, 0.125, 0.540, 0.797, 0.628…
## $ total_votes16                <dbl> 74189, 57413, 1160, 14698, 127403, 20959…
## $ votes16_donald_trump         <dbl> 54656, 33804, 970, 6525, 20281, 4893, 25…
## $ votes16_hillary_clinton      <dbl> 16485, 21985, 145, 7944, 101487, 13162, …
## $ percentage20_donald_trump    <dbl> 0.750, 0.599, 0.863, 0.505, 0.161, 0.295…
## $ percentage20_joe_biden       <dbl> 0.237, 0.392, 0.126, 0.481, 0.823, 0.680…
## $ total_votes20                <dbl> 104667, 71846, 1163, 13064, 131765, 2640…
## $ votes20_donald_trump         <dbl> 78534, 43039, 1004, 6591, 21185, 7782, 2…
## $ votes20_joe_biden            <dbl> 24831, 28148, 147, 6283, 108385, 17969, …
## $ lat                          <dbl> 35.70472, 29.45934, 37.19141, 34.76216, …
## $ long                         <dbl> -113.75779, -81.31509, -101.79925, -89.5…
## $ cases                        <dbl> 4453, 2181, 56, 1548, NA, 4752, 264, 86,…
## $ deaths                       <dbl> 234, 39, 2, 32, NA, 262, 8, 1, NA, 0, 30…
## $ total_pop                    <dbl> 204691, 105015, 2931, 35981, 314867, 728…
## $ men                          <dbl> 103175, 50436, 1534, 17793, 152224, 3520…
## $ women                        <dbl> 101516, 54579, 1397, 18188, 162643, 3764…
## $ hispanic                     <dbl> 15.9, 10.0, 22.6, 3.5, 3.9, 14.1, 5.0, 1…
## $ white                        <dbl> 78.0, 74.7, 72.4, 47.7, 42.9, 9.1, 44.9,…
## $ black                        <dbl> 1.0, 10.0, 0.8, 47.6, 47.5, 0.5, 48.2, 0…
## $ native                       <dbl> 2.1, 0.3, 0.0, 0.1, 0.2, 73.3, 0.1, 2.8,…
## $ asian                        <dbl> 1.2, 2.5, 0.1, 0.1, 3.1, 0.9, 0.2, 0.0, …
## $ pacific                      <dbl> 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ voting_age_citizen           <dbl> 160544, 83353, 1978, 27763, 241414, 5054…
## $ income                       <dbl> 41567, 51049, 43813, 41134, 38664, 30336…
## $ income_err                   <dbl> 796, 1256, 9676, 2932, 799, 1354, 3147, …
## $ income_per_cap               <dbl> 23527, 25741, 23038, 19775, 26739, 14077…
## $ income_per_cap_err           <dbl> 630, 654, 1831, 1047, 519, 634, 2590, 33…
## $ poverty                      <dbl> 18.6, 13.3, 8.0, 17.8, 25.0, 37.5, 24.1,…
## $ child_poverty                <dbl> 27.2, 21.5, 7.9, 26.6, 39.8, 45.7, 46.6,…
## $ professional                 <dbl> 24.3, 31.6, 29.5, 23.2, 40.6, 29.4, 15.8…
## $ service                      <dbl> 25.4, 19.3, 15.5, 14.3, 22.5, 25.1, 23.0…
## $ office                       <dbl> 26.8, 31.9, 17.3, 24.9, 21.7, 22.0, 23.6…
## $ construction                 <dbl> 10.9, 9.4, 32.5, 12.6, 5.0, 9.7, 12.3, 2…
## $ production                   <dbl> 12.7, 7.8, 5.3, 25.1, 10.2, 13.8, 25.3, …
## $ drive                        <dbl> 79.9, 83.5, 71.3, 86.1, 71.8, 74.8, 81.2…
## $ carpool                      <dbl> 12.6, 6.6, 13.6, 5.4, 8.2, 9.7, 13.1, 10…
## $ transit                      <dbl> 0.9, 0.3, 0.0, 0.4, 9.5, 0.6, 0.0, 0.0, …
## $ walk                         <dbl> 1.5, 0.3, 3.9, 1.4, 4.4, 3.7, 1.6, 8.0, …
## $ other_transp                 <dbl> 2.1, 1.7, 2.0, 0.8, 1.9, 1.6, 2.3, 0.6, …
## $ work_at_home                 <dbl> 3.1, 7.7, 9.3, 6.0, 4.2, 9.5, 1.8, 7.8, …
## $ mean_commute                 <dbl> 20.4, 25.7, 17.7, 28.2, 24.1, 22.1, 25.1…
## $ employed                     <dbl> 69774, 39278, 1275, 14304, 152079, 23207…
## $ private_work                 <dbl> 78.6, 81.5, 69.6, 80.3, 83.9, 56.9, 78.4…
## $ public_work                  <dbl> 14.1, 11.8, 20.5, 11.9, 11.8, 35.7, 15.0…
## $ self_employed                <dbl> 7.1, 6.6, 9.8, 7.7, 4.0, 7.4, 6.6, 11.0,…
## $ family_work                  <dbl> 0.2, 0.1, 0.0, 0.1, 0.2, 0.0, 0.0, 0.4, …
## $ unemployment                 <dbl> 10.1, 6.5, 8.7, 6.2, 9.4, 16.1, 10.4, 2.…
## $ winner                       <chr> "Trump", "Trump", "Trump", "Trump", "Bid…
## $ winner16                     <chr> "Trump", "Trump", "Trump", "Clinton", "C…
## $ geometry                     <MULTIPOLYGON [m]> MULTIPOLYGON (((-1321573 -8…

View datasets summary.

# Summary
summary(election)
##        x1          county             state           percentage16_donald_trump
##  Min.   :   0   Length:4867        Length:4867        Min.   :0.0410           
##  1st Qu.:1216   Class :character   Class :character   1st Qu.:0.5500           
##  Median :2433   Mode  :character   Mode  :character   Median :0.6670           
##  Mean   :2435                                         Mean   :0.6362           
##  3rd Qu.:3650                                         3rd Qu.:0.7505           
##  Max.   :4954                                         Max.   :0.9530           
##                                                       NA's   :1756             
##  percentage16_hillary_clinton total_votes16     votes16_donald_trump
##  Min.   :0.0310               Min.   :     64   Min.   :    57      
##  1st Qu.:0.2045               1st Qu.:   4824   1st Qu.:  3207      
##  Median :0.2850               Median :  10935   Median :  7117      
##  Mean   :0.3168               Mean   :  40916   Mean   : 19350      
##  3rd Qu.:0.3990               3rd Qu.:  28675   3rd Qu.: 17396      
##  Max.   :0.9280               Max.   :2314275   Max.   :590465      
##  NA's   :1756                 NA's   :1756      NA's   :1756        
##  votes16_hillary_clinton percentage20_donald_trump percentage20_joe_biden
##  Min.   :      4         Min.   :0.0000            Min.   :0.0310        
##  1st Qu.:   1164         1st Qu.:0.4540            1st Qu.:0.2470        
##  Median :   3140         Median :0.6020            Median :0.3760        
##  Mean   :  19566         Mean   :0.5847            Mean   :0.3951        
##  3rd Qu.:   9536         3rd Qu.:0.7340            3rd Qu.:0.5240        
##  Max.   :1654626         Max.   :0.9620            Max.   :1.0000        
##  NA's   :1756            NA's   :318               NA's   :318           
##  total_votes20     votes20_donald_trump votes20_joe_biden      lat       
##  Min.   :      0   Min.   :      0      Min.   :      0   Min.   : 0.00  
##  1st Qu.:   2322   1st Qu.:   1233      1st Qu.:    740   1st Qu.:34.27  
##  Median :   7481   Median :   4348      Median :   2417   Median :38.17  
##  Mean   :  33162   Mean   :  15691      Mean   :  16876   Mean   :37.25  
##  3rd Qu.:  20072   3rd Qu.:  12278      3rd Qu.:   7366   3rd Qu.:41.68  
##  Max.   :4139895   Max.   :1107090      Max.   :2947568   Max.   :69.31  
##  NA's   :230       NA's   :230          NA's   :230       NA's   :1615   
##       long             cases              deaths          total_pop       
##  Min.   :-174.16   Min.   :     0.0   Min.   :   0.00   Min.   :      74  
##  1st Qu.: -98.07   1st Qu.:   216.8   1st Qu.:   2.00   1st Qu.:   10945  
##  Median : -89.92   Median :   614.0   Median :  10.00   Median :   25692  
##  Mean   : -89.42   Mean   :  2808.8   Mean   :  70.75   Mean   :  102166  
##  3rd Qu.: -82.84   3rd Qu.:  1758.0   3rd Qu.:  35.00   3rd Qu.:   67445  
##  Max.   :   0.00   Max.   :309190.0   Max.   :7404.00   Max.   :10105722  
##  NA's   :1615      NA's   :1615       NA's   :1615      NA's   :1725      
##       men              women            hispanic          white       
##  Min.   :     39   Min.   :     35   Min.   : 0.000   Min.   :  0.60  
##  1st Qu.:   5514   1st Qu.:   5460   1st Qu.: 2.100   1st Qu.: 65.10  
##  Median :  12798   Median :  12885   Median : 4.000   Median : 84.20  
##  Mean   :  50292   Mean   :  51873   Mean   : 9.121   Mean   : 76.76  
##  3rd Qu.:  33481   3rd Qu.:  34108   3rd Qu.: 9.300   3rd Qu.: 92.90  
##  Max.   :4979641   Max.   :5126081   Max.   :99.200   Max.   :100.00  
##  NA's   :1725      NA's   :1725      NA's   :1725     NA's   :1725    
##      black            native           asian          pacific       
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.00   Min.   : 0.0000  
##  1st Qu.: 0.600   1st Qu.: 0.100   1st Qu.: 0.30   1st Qu.: 0.0000  
##  Median : 2.100   Median : 0.300   Median : 0.60   Median : 0.0000  
##  Mean   : 8.896   Mean   : 1.812   Mean   : 1.32   Mean   : 0.0855  
##  3rd Qu.: 9.875   3rd Qu.: 0.600   3rd Qu.: 1.20   3rd Qu.: 0.1000  
##  Max.   :86.900   Max.   :90.300   Max.   :41.80   Max.   :33.7000  
##  NA's   :1725     NA's   :1725     NA's   :1725    NA's   :1725     
##  voting_age_citizen     income         income_err    income_per_cap 
##  Min.   :     59    Min.   : 19264   Min.   :  262   Min.   : 9334  
##  1st Qu.:   8279    1st Qu.: 41123   1st Qu.: 1762   1st Qu.:21810  
##  Median :  19480    Median : 48066   Median : 2619   Median :25272  
##  Mean   :  72223    Mean   : 49754   Mean   : 3176   Mean   :26040  
##  3rd Qu.:  51224    3rd Qu.: 55764   3rd Qu.: 3834   3rd Qu.:29126  
##  Max.   :6218279    Max.   :129588   Max.   :41001   Max.   :69529  
##  NA's   :1725       NA's   :1725     NA's   :1725    NA's   :1725   
##  income_per_cap_err    poverty      child_poverty    professional  
##  Min.   :  129      Min.   : 2.40   Min.   : 0.00   Min.   :11.40  
##  1st Qu.:  849      1st Qu.:11.30   1st Qu.:14.80   1st Qu.:27.30  
##  Median : 1240      Median :15.20   Median :21.20   Median :30.50  
##  Mean   : 1532      Mean   :15.99   Mean   :22.11   Mean   :31.54  
##  3rd Qu.: 1826      3rd Qu.:19.40   3rd Qu.:27.80   3rd Qu.:34.90  
##  Max.   :16145      Max.   :52.00   Max.   :76.50   Max.   :69.00  
##  NA's   :1725       NA's   :1725    NA's   :1726    NA's   :1725   
##     service          office       construction     production   
##  Min.   : 0.00   Min.   : 4.80   Min.   : 0.00   Min.   : 0.00  
##  1st Qu.:15.70   1st Qu.:19.90   1st Qu.: 9.80   1st Qu.:11.60  
##  Median :17.80   Median :22.00   Median :12.20   Median :15.50  
##  Mean   :18.12   Mean   :21.78   Mean   :12.64   Mean   :15.93  
##  3rd Qu.:20.07   3rd Qu.:23.80   3rd Qu.:14.90   3rd Qu.:19.60  
##  Max.   :46.40   Max.   :37.20   Max.   :36.40   Max.   :48.70  
##  NA's   :1725    NA's   :1725    NA's   :1725    NA's   :1725   
##      drive          carpool          transit             walk       
##  Min.   : 4.60   Min.   : 0.000   Min.   : 0.0000   Min.   : 0.000  
##  1st Qu.:77.20   1st Qu.: 8.100   1st Qu.: 0.1000   1st Qu.: 1.400  
##  Median :81.00   Median : 9.500   Median : 0.3000   Median : 2.300  
##  Mean   :79.52   Mean   : 9.899   Mean   : 0.9368   Mean   : 3.236  
##  3rd Qu.:84.00   3rd Qu.:11.300   3rd Qu.: 0.8000   3rd Qu.: 3.800  
##  Max.   :97.20   Max.   :29.300   Max.   :61.8000   Max.   :59.200  
##  NA's   :1725    NA's   :1725     NA's   :1725      NA's   :1725    
##   other_transp     work_at_home     mean_commute      employed      
##  Min.   : 0.000   Min.   : 0.000   Min.   : 5.10   Min.   :     39  
##  1st Qu.: 0.900   1st Qu.: 2.900   1st Qu.:19.60   1st Qu.:   4550  
##  Median : 1.300   Median : 4.100   Median :23.10   Median :  10695  
##  Mean   : 1.603   Mean   : 4.803   Mean   :23.35   Mean   :  47931  
##  3rd Qu.: 1.900   3rd Qu.: 5.800   3rd Qu.:26.90   3rd Qu.:  29515  
##  Max.   :43.200   Max.   :33.000   Max.   :45.10   Max.   :4805817  
##  NA's   :1725     NA's   :1725     NA's   :1725    NA's   :1725     
##   private_work    public_work    self_employed     family_work    
##  Min.   :31.10   Min.   : 4.40   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.:71.70   1st Qu.:12.70   1st Qu.: 5.200   1st Qu.:0.1000  
##  Median :76.30   Median :15.70   Median : 6.800   Median :0.2000  
##  Mean   :75.07   Mean   :16.89   Mean   : 7.758   Mean   :0.2824  
##  3rd Qu.:80.30   3rd Qu.:19.50   3rd Qu.: 9.175   3rd Qu.:0.3000  
##  Max.   :88.80   Max.   :64.80   Max.   :38.000   Max.   :8.0000  
##  NA's   :1725    NA's   :1725    NA's   :1725     NA's   :1725    
##   unemployment       winner            winner16        
##  Min.   : 0.000   Length:4867        Length:4867       
##  1st Qu.: 4.400   Class :character   Class :character  
##  Median : 6.100   Mode  :character   Mode  :character  
##  Mean   : 6.364                                        
##  3rd Qu.: 7.800                                        
##  Max.   :28.800                                        
##  NA's   :1725
summary(data)
##  county_fips         state_abbv         state_fips        county_name       
##  Length:3142        Length:3142        Length:3142        Length:3142       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   fips_class         state_name        county_name2             x1      
##  Length:3142        Length:3142        Length:3142        Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.: 785  
##  Mode  :character   Mode  :character   Mode  :character   Median :1570  
##                                                           Mean   :1587  
##                                                           3rd Qu.:2355  
##                                                           Max.   :4953  
##                                                           NA's   :1     
##  percentage16_donald_trump percentage16_hillary_clinton total_votes16    
##  Min.   :0.0410            Min.   :0.0310               Min.   :     64  
##  1st Qu.:0.5500            1st Qu.:0.2045               1st Qu.:   4824  
##  Median :0.6670            Median :0.2850               Median :  10935  
##  Mean   :0.6362            Mean   :0.3168               Mean   :  40916  
##  3rd Qu.:0.7505            3rd Qu.:0.3990               3rd Qu.:  28675  
##  Max.   :0.9530            Max.   :0.9280               Max.   :2314275  
##  NA's   :31                NA's   :31                   NA's   :31       
##  votes16_donald_trump votes16_hillary_clinton percentage20_donald_trump
##  Min.   :    57       Min.   :      4         Min.   :0.0400           
##  1st Qu.:  3207       1st Qu.:   1164         1st Qu.:0.5620           
##  Median :  7117       Median :   3140         Median :0.6840           
##  Mean   : 19350       Mean   :  19566         Mean   :0.6516           
##  3rd Qu.: 17396       3rd Qu.:   9536         3rd Qu.:0.7750           
##  Max.   :590465       Max.   :1654626         Max.   :0.9620           
##  NA's   :31           NA's   :31              NA's   :27               
##  percentage20_joe_biden total_votes20     votes20_donald_trump
##  Min.   :0.0310         Min.   :      0   Min.   :      0     
##  1st Qu.:0.2090         1st Qu.:   5121   1st Qu.:   3499     
##  Median :0.2990         Median :  11899   Median :   7952     
##  Mean   :0.3309         Mean   :  46856   Mean   :  22480     
##  3rd Qu.:0.4190         3rd Qu.:  31072   3rd Qu.:  19511     
##  Max.   :0.9400         Max.   :4139895   Max.   :1107090     
##  NA's   :27             NA's   :52        NA's   :52          
##  votes20_joe_biden      lat             long             cases       
##  Min.   :      0   Min.   :19.60   Min.   :-159.60   Min.   :     0  
##  1st Qu.:   1242   1st Qu.:34.61   1st Qu.: -98.18   1st Qu.:   240  
##  Median :   3480   Median :38.36   Median : -90.42   Median :   654  
##  Mean   :  23545   Mean   :38.26   Mean   : -91.94   Mean   :  2909  
##  3rd Qu.:  10814   3rd Qu.:41.73   3rd Qu.: -83.62   3rd Qu.:  1795  
##  Max.   :2947568   Max.   :48.82   Max.   : -67.63   Max.   :309190  
##  NA's   :52        NA's   :70      NA's   :70        NA's   :70      
##      deaths          total_pop             men              women        
##  Min.   :   0.00   Min.   :      74   Min.   :     39   Min.   :     35  
##  1st Qu.:   3.00   1st Qu.:   10948   1st Qu.:   5516   1st Qu.:   5462  
##  Median :  11.00   Median :   25694   Median :  12804   Median :  12887  
##  Mean   :  73.77   Mean   :  102189   Mean   :  50304   Mean   :  51885  
##  3rd Qu.:  36.00   3rd Qu.:   67413   3rd Qu.:  33480   3rd Qu.:  34102  
##  Max.   :7404.00   Max.   :10105722   Max.   :4979641   Max.   :5126081  
##  NA's   :70        NA's   :3          NA's   :3         NA's   :3        
##     hispanic          white            black           native      
##  Min.   : 0.000   Min.   :  0.60   Min.   : 0.00   Min.   : 0.000  
##  1st Qu.: 2.050   1st Qu.: 65.10   1st Qu.: 0.60   1st Qu.: 0.100  
##  Median : 4.000   Median : 84.20   Median : 2.10   Median : 0.300  
##  Mean   : 9.104   Mean   : 76.78   Mean   : 8.90   Mean   : 1.811  
##  3rd Qu.: 9.300   3rd Qu.: 92.95   3rd Qu.: 9.85   3rd Qu.: 0.600  
##  Max.   :99.200   Max.   :100.00   Max.   :86.90   Max.   :90.300  
##  NA's   :3        NA's   :3        NA's   :3       NA's   :3       
##      asian           pacific         voting_age_citizen     income      
##  Min.   : 0.000   Min.   : 0.00000   Min.   :     59    Min.   : 19264  
##  1st Qu.: 0.300   1st Qu.: 0.00000   1st Qu.:   8280    1st Qu.: 41126  
##  Median : 0.600   Median : 0.00000   Median :  19506    Median : 48072  
##  Mean   : 1.319   Mean   : 0.08515   Mean   :  72244    Mean   : 49758  
##  3rd Qu.: 1.200   3rd Qu.: 0.10000   3rd Qu.:  51210    3rd Qu.: 55763  
##  Max.   :41.800   Max.   :33.70000   Max.   :6218279    Max.   :129588  
##  NA's   :3        NA's   :3          NA's   :3          NA's   :3       
##    income_err    income_per_cap  income_per_cap_err    poverty     
##  Min.   :  262   Min.   : 9334   Min.   :  129      Min.   : 2.40  
##  1st Qu.: 1762   1st Qu.:21824   1st Qu.:  849      1st Qu.:11.30  
##  Median : 2619   Median :25273   Median : 1239      Median :15.20  
##  Mean   : 3177   Mean   :26041   Mean   : 1532      Mean   :15.98  
##  3rd Qu.: 3836   3rd Qu.:29124   3rd Qu.: 1826      3rd Qu.:19.40  
##  Max.   :41001   Max.   :69529   Max.   :16145      Max.   :52.00  
##  NA's   :3       NA's   :3       NA's   :3          NA's   :3      
##  child_poverty   professional      service          office       construction  
##  Min.   : 0.0   Min.   :11.40   Min.   : 0.00   Min.   : 4.80   Min.   : 0.00  
##  1st Qu.:14.8   1st Qu.:27.30   1st Qu.:15.70   1st Qu.:19.90   1st Qu.: 9.80  
##  Median :21.2   Median :30.50   Median :17.80   Median :22.00   Median :12.20  
##  Mean   :22.1   Mean   :31.54   Mean   :18.12   Mean   :21.78   Mean   :12.63  
##  3rd Qu.:27.8   3rd Qu.:34.90   3rd Qu.:20.05   3rd Qu.:23.80   3rd Qu.:14.90  
##  Max.   :76.5   Max.   :69.00   Max.   :46.40   Max.   :37.20   Max.   :36.40  
##  NA's   :4      NA's   :3       NA's   :3       NA's   :3       NA's   :3      
##    production        drive          carpool          transit       
##  Min.   : 0.00   Min.   : 4.60   Min.   : 0.000   Min.   : 0.0000  
##  1st Qu.:11.60   1st Qu.:77.20   1st Qu.: 8.100   1st Qu.: 0.1000  
##  Median :15.50   Median :81.00   Median : 9.500   Median : 0.3000  
##  Mean   :15.93   Mean   :79.53   Mean   : 9.895   Mean   : 0.9369  
##  3rd Qu.:19.60   3rd Qu.:84.00   3rd Qu.:11.300   3rd Qu.: 0.8000  
##  Max.   :48.70   Max.   :97.20   Max.   :29.300   Max.   :61.8000  
##  NA's   :3       NA's   :3       NA's   :3        NA's   :3        
##       walk         other_transp     work_at_home     mean_commute  
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 5.10  
##  1st Qu.: 1.400   1st Qu.: 0.900   1st Qu.: 2.900   1st Qu.:19.60  
##  Median : 2.300   Median : 1.300   Median : 4.100   Median :23.10  
##  Mean   : 3.231   Mean   : 1.602   Mean   : 4.804   Mean   :23.35  
##  3rd Qu.: 3.800   3rd Qu.: 1.900   3rd Qu.: 5.800   3rd Qu.:26.90  
##  Max.   :59.200   Max.   :43.200   Max.   :33.000   Max.   :45.10  
##  NA's   :3        NA's   :3        NA's   :3        NA's   :3      
##     employed        private_work    public_work    self_employed   
##  Min.   :     39   Min.   :31.10   Min.   : 4.40   Min.   : 0.000  
##  1st Qu.:   4551   1st Qu.:71.70   1st Qu.:12.70   1st Qu.: 5.200  
##  Median :  10697   Median :76.30   Median :15.70   Median : 6.800  
##  Mean   :  47946   Mean   :75.07   Mean   :16.89   Mean   : 7.758  
##  3rd Qu.:  29488   3rd Qu.:80.30   3rd Qu.:19.50   3rd Qu.: 9.150  
##  Max.   :4805817   Max.   :88.80   Max.   :64.80   Max.   :38.000  
##  NA's   :3         NA's   :3       NA's   :3       NA's   :3       
##   family_work      unemployment       winner            winner16        
##  Min.   :0.0000   Min.   : 0.000   Length:3142        Length:3142       
##  1st Qu.:0.1000   1st Qu.: 4.400   Class :character   Class :character  
##  Median :0.2000   Median : 6.100   Mode  :character   Mode  :character  
##  Mean   :0.2826   Mean   : 6.364                                        
##  3rd Qu.:0.3000   3rd Qu.: 7.800                                        
##  Max.   :8.0000   Max.   :28.800                                        
##  NA's   :3        NA's   :3                                             
##           geometry   
##  MULTIPOLYGON :3142  
##  epsg:2163    :   0  
##  +proj=laea...:   0  
##                      
##                      
##                      
## 

Now we will create a new dataset with the STATE POLYGONS from urbanmap and the data from electoral_votes. We will add new data such as:

We will use this data to produce maps and analyze the underlying trends on them. The data included was taken from this article

# Identified swing states
swing_states <- c("Arizona","Texas","Florida","Georgia","Pennsylvania","Ohio","Wisconsin","North Carolina","South Carolina","Iowa","Nevada","Michigan")
# Swing states Won by Trump
trump_won_swing<- c("Texas","Florida","Ohio","North Carolina","South Carolina","Iowa")
# Swing states won by Biden
biden_won_swing<- c("Arizona","Georgia","Pennsylvania","Wisconsin","Nevada","Michigan")
# Swing states won by Trump in 2016
trump_won_swing16<-c("Arizona","Texas","Florida","Georgia","Pennsylvania","Ohio","Wisconsin","North Carolina","South Carolina","Iowa","Michigan")
# Swing states won by Hillary in 2016
hillary_won_swing16<-c("Nevada")
# Swing states flipped by Biden in 2020
flipped<-c("Arizona","Georgia","Pennsylvania","Wisconsin","Michigan")



electoral_mapping<- 
  get_urbn_map("states", sf=TRUE) %>%
  left_join(electoral_votes %>% select(!year), 
            by =c("state_name" = "state")
            ) %>%
  
  # Create variable winner_by
  mutate(winner_by = ifelse(state_name %in% trump_won_swing, 
                            "Trump", 
                            ifelse(state_name %in% biden_won_swing, 
                                   "Biden", NA)),
         # Create variable winner_by 16
         winner_by16 = ifelse(state_name %in% trump_won_swing16, "Trump", 
                              ifelse(state_name %in% hillary_won_swing16, "Hillary", NA)),
         # Create variable was flipped
         was_flipped = ifelse(state_name %in% flipped, TRUE, FALSE),
         # Create variable colour
         colour = ifelse(state_name %in% trump_won_swing, "red",
                         ifelse(state_name %in% hillary_won_swing16, "blue",
                                ifelse(state_name %in% flipped, "flipped",NA))))

Visualizations

General Overview

Battle Trump against Biden

party_colours <- c("Biden" = "#2E74C0", "Trump"= "#CB454A")

election %>%
  select(votes20_donald_trump,votes20_joe_biden) %>% 
  drop_na() %>% 
  summarise(Biden = sum(votes20_joe_biden),
            Trump = sum(votes20_donald_trump)) %>% 

  ggplot() + 
  geom_col(aes(x = 1, y = 538, fill = "Trump"), width = 1) +
  geom_col(aes(x = 1, y = 306, fill = "Biden"), width = 1) +
  scale_fill_manual(values = party_colours) + 
  
  labs(title = "Biden triumphs by 6M votes",
       subtitle = "2020 election vote counts",
       fill = NULL) + 
  theme_void() +
  theme(legend.position = "top",
         plot.title = element_text(family = "Courier", face = "bold", size = 20),
         plot.subtitle = element_text(family = "Courier", size = 12),
         legend.text=element_text(family="Courier"),
         legend.title=element_text(family="Courier"))+
  xlim(c(0,2)) + 
  coord_flip()  +
  geom_linerangeh(aes(y = 270, xmin = 0.5, xmax = 1.75),
                 size = 0.7,
                 color = "black") +
  annotate("text", x=1.82, y=270, label="270 electors", size = 4,color = "black",family = "Courier") +
  # annotate("text", x=1.6, y=70, label="306 electors", size = 8,color = "#2E74C0") +
  # annotate("text", x=1.6, y=470, label="232 electors", size = 8,color = "#CB454A") +
  annotate("text", x=1, y=35, label="306", size = 10,color = "white", fontface = "bold", family = "Courier") +
  annotate("text", x=1, y=505, label="232", size = 10,color = "white", fontface = "bold",family = "Courier") +
  annotate("text", x=0.4, y=70, label="78M votes (52%)", size = 5,color = "#2E74C0",family = "Courier") +
  annotate("text", x=0.4, y=465, label="72M votes (48%)", size = 5,color = "#CB454A",family = "Courier") +
  
  theme(plot.title = element_text(face = "bold"))

Representation of counties

party_colours <- c("Biden" = "#2E74C0", "Trump"= "#CB454A")

p1 <- election %>%
  select(votes20_donald_trump,votes20_joe_biden) %>% 
  drop_na() %>% 
  summarise(Biden = sum(votes20_joe_biden),
            Trump = sum(votes20_donald_trump)) %>% 

  ggplot() + 
  geom_col(aes(x = 0.76, y = Biden + Trump, fill = "Trump"), width = 1.5) +
  geom_col(aes(x = 0.76, y = Biden, fill = "Biden"), width = 1.5) +
  scale_fill_manual(values = party_colours) + 
  
  labs(title = "Counties do not display people",
       subtitle = "2020 election vote counts",
       fill = NULL) + 
  theme_void() +
  theme(plot.title = element_text(family = "Courier", face = "bold", size = 20),
         plot.subtitle = element_text(family = "Courier", size = 12),
        legend.position = "top",
        text = element_text(family = "Courier"))+
  xlim(c(0,2)) + 
  coord_flip()  +
  # geom_linerangeh(aes(y = 0.5*(Biden+Trump), xmin = 0.5, xmax = 1.75),
  #                size = 0.5,
  #                color = "black") +
  
  annotate("text", x=0.76, y=12000000, label="78M", size = 10,color = "white", fontface = "bold",family = "Courier") +
  annotate("text", x=0.76, y=137500000, label="72M", size = 10,color = "white", fontface = "bold",family = "Courier") +
  annotate("text", x=1.8, y=18000000, label="By vote count:", size = 4.5,color = "black", fontface = "bold",family = "Courier") +
  
  theme(plot.title = element_text(face = "bold"))
party_colours <- c("Biden" = "#2E74C0", "Trump"= "#CB454A")

summarised_el <- election %>%
  select(votes20_donald_trump,votes20_joe_biden) %>% 
  drop_na() %>% 
  summarise(Biden = sum(votes20_joe_biden),
            Trump = sum(votes20_donald_trump))
p2 <- data %>%
  # select(votes20_donald_trump,votes20_joe_biden) %>% 
  # drop_na() %>% 
  # pivot_longer(cols = Biden:Trump, names_to = ("candidate")) %>% 
  ggplot() + 
  geom_bar(aes(x = 0.76, fill = winner), position = position_stack(reverse = T), width = 1.5) + 
  # geom_col(`dataaes(x = 1, y = Biden + Trump, fill = "Trump"), width = 1)
  # geom_bar(aes(x = 1, y = Biden + Trump, fill = "Trump")) +
  # geom_col(aes(x = 1, y = Biden, fill = "Biden")) +
  scale_fill_manual(values = party_colours, na.translate = F) + 
  # labs(title = "Trump gets majory of counties dispite loss",
  #      subtitle = "2020 election vote counts by counties",
  #      fill = NULL) + 
  theme_void() +
  theme(plot.title = element_text(face = 'bold'),
        legend.position = "top",
        text = element_text(family = "Courier"))+
  xlim(c(0,2)) + ylim(c(0,3115)) +
  coord_flip() +
  annotate("text", x=0.76, y=210, label="515", size = 10,color = "white", fontface = "bold",family = "Courier") +
  annotate("text", x=0.76, y=2800, label="2600", size = 10,color = "white", fontface = "bold",family = "Courier") +
  annotate("text", x=1.8, y=320, label="By counties:", size = 4.5,color = "black", fontface = "bold",family = "Courier") +
  guides(fill = FALSE)

library(patchwork)
p1/p2

Votes in 2016

# electoral votes battle
party_colours <- c("Clinton" = "#2E74C0", "Trump"= "#CB454A")

election %>%
  select(votes20_donald_trump,votes20_joe_biden) %>% 
  drop_na() %>% 
  summarise(Biden = sum(votes20_joe_biden),
            Trump = sum(votes20_donald_trump)) %>% 

  ggplot() + 
  geom_col(aes(x = 1, y = 538, fill = "Trump"), width = 1) +
  geom_col(aes(x = 1, y = 232, fill = "Clinton"), width = 1) +
  scale_fill_manual(values = party_colours) + 
  
  labs(title = "Trump won by electoral vote difference",
       subtitle = "2016 Election Results",
       fill = NULL) + 
  theme_void() +
  theme(legend.position = "top",
         plot.title = element_text(family = "Courier", face = "bold", size = 20),
         plot.subtitle = element_text(family = "Courier", size = 12),
         legend.text=element_text(family="Courier"),
         legend.title=element_text(family="Courier"))+
  xlim(c(0,2)) + 
  coord_flip()  +
  geom_linerangeh(aes(y = 270, xmin = 0.5, xmax = 1.75),
                 size = 0.7,
                 color = "black") +
  annotate("text", x=1.82, y=270, label="270 electors", size = 4,color = "black",family = "Courier") +
  # annotate("text", x=1.6, y=70, label="306 electors", size = 8,color = "#2E74C0") +
  # annotate("text", x=1.6, y=470, label="232 electors", size = 8,color = "#CB454A") +
  annotate("text", x=1, y=35, label="232", size = 10,color = "white", fontface = "bold", family = "Courier") +
  annotate("text", x=1, y=505, label="306", size = 10,color = "white", fontface = "bold",family = "Courier") +
  annotate("text", x=0.4, y=70, label="66M votes (48%)", size = 5,color = "#2E74C0",family = "Courier") +
  annotate("text", x=0.4, y=465, label="63M votes (46%)", size = 5,color = "#CB454A",family = "Courier") +
  
  theme(plot.title = element_text(face = "bold"))

Votes by Counties

data %>% 
  ggplot(aes()) +
  geom_sf(aes(fill = winner, colour = winner)) +
  scale_fill_manual(values = c("#2E74C0","#CB454A"), na.translate = F) +
  scale_colour_manual(values = c("#2E74C0","#CB454A"), na.translate = F) +
  coord_sf(datum = NA) +
  theme_void() +
  theme(plot.title = element_text(face = "bold"),
        legend.position = "top",
        text = element_text(family = "Courier")) + 
  labs(title = "US turns red despite Trump loss",
       subtitle = "US county map colored by winner",
       fill = NULL) +
  guides(color= FALSE)

Now we will create circles to visualize this by population

circles_data <- data
st_geometry(circles_data) <- NULL

circles_data_sf <- circles_data %>%
  drop_na(long, lat) %>%
  filter(state_abbv != "HI") %>% 
  mutate(long = long,
         lat = lat) %>% 
  st_as_sf(coords = c('long', 'lat'),
           crs = 4326)

glimpse(circles_data_sf)
## Rows: 3,067
## Columns: 57
## $ county_fips                  <chr> "04015", "12035", "20129", "28093", "350…
## $ state_abbv                   <chr> "AZ", "FL", "KS", "MS", "NM", "NC", "ND"…
## $ state_fips                   <chr> "04", "12", "20", "28", "35", "37", "38"…
## $ county_name                  <chr> "Mohave County", "Flagler County", "Mort…
## $ fips_class                   <chr> "H1", "H1", "H1", "H1", "H1", "H1", "H1"…
## $ state_name                   <chr> "Arizona", "Florida", "Kansas", "Mississ…
## $ county_name2                 <chr> "Mohave", "Flagler", "Morton", "Marshall…
## $ x1                           <dbl> 1929, 921, 2001, 1799, 1844, 2937, 310, …
## $ percentage16_donald_trump    <dbl> 0.737, 0.589, 0.836, 0.444, 0.233, 0.419…
## $ percentage16_hillary_clinton <dbl> 0.222, 0.383, 0.125, 0.540, 0.628, 0.571…
## $ total_votes16                <dbl> 74189, 57413, 1160, 14698, 20959, 6115, …
## $ votes16_donald_trump         <dbl> 54656, 33804, 970, 6525, 4893, 2560, 879…
## $ votes16_hillary_clinton      <dbl> 16485, 21985, 145, 7944, 13162, 3490, 11…
## $ percentage20_donald_trump    <dbl> 0.750, 0.599, 0.863, 0.505, 0.295, 0.449…
## $ percentage20_joe_biden       <dbl> 0.237, 0.392, 0.126, 0.481, 0.680, 0.547…
## $ total_votes20                <dbl> 104667, 71846, 1163, 13064, 26406, 6191,…
## $ votes20_donald_trump         <dbl> 78534, 43039, 1004, 6591, 7782, 2778, 98…
## $ votes20_joe_biden            <dbl> 24831, 28148, 147, 6283, 17969, 3387, 13…
## $ cases                        <dbl> 4453, 2181, 56, 1548, 4752, 264, 86, 68,…
## $ deaths                       <dbl> 234, 39, 2, 32, 262, 8, 1, 0, 30, 52, 6,…
## $ total_pop                    <dbl> 204691, 105015, 2931, 35981, 72849, 1233…
## $ men                          <dbl> 103175, 50436, 1534, 17793, 35209, 5846,…
## $ women                        <dbl> 101516, 54579, 1397, 18188, 37640, 6485,…
## $ hispanic                     <dbl> 15.9, 10.0, 22.6, 3.5, 14.1, 5.0, 1.6, 5…
## $ white                        <dbl> 78.0, 74.7, 72.4, 47.7, 9.1, 44.9, 93.4,…
## $ black                        <dbl> 1.0, 10.0, 0.8, 47.6, 0.5, 48.2, 0.4, 0.…
## $ native                       <dbl> 2.1, 0.3, 0.0, 0.1, 73.3, 0.1, 2.8, 0.4,…
## $ asian                        <dbl> 1.2, 2.5, 0.1, 0.1, 0.9, 0.2, 0.0, 0.0, …
## $ pacific                      <dbl> 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ voting_age_citizen           <dbl> 160544, 83353, 1978, 27763, 50540, 9521,…
## $ income                       <dbl> 41567, 51049, 43813, 41134, 30336, 34557…
## $ income_err                   <dbl> 796, 1256, 9676, 2932, 1354, 3147, 4852,…
## $ income_per_cap               <dbl> 23527, 25741, 23038, 19775, 14077, 21117…
## $ income_per_cap_err           <dbl> 630, 654, 1831, 1047, 634, 2590, 3351, 2…
## $ poverty                      <dbl> 18.6, 13.3, 8.0, 17.8, 37.5, 24.1, 7.3, …
## $ child_poverty                <dbl> 27.2, 21.5, 7.9, 26.6, 45.7, 46.6, 7.1, …
## $ professional                 <dbl> 24.3, 31.6, 29.5, 23.2, 29.4, 15.8, 35.2…
## $ service                      <dbl> 25.4, 19.3, 15.5, 14.3, 25.1, 23.0, 14.6…
## $ office                       <dbl> 26.8, 31.9, 17.3, 24.9, 22.0, 23.6, 15.7…
## $ construction                 <dbl> 10.9, 9.4, 32.5, 12.6, 9.7, 12.3, 21.8, …
## $ production                   <dbl> 12.7, 7.8, 5.3, 25.1, 13.8, 25.3, 12.8, …
## $ drive                        <dbl> 79.9, 83.5, 71.3, 86.1, 74.8, 81.2, 73.2…
## $ carpool                      <dbl> 12.6, 6.6, 13.6, 5.4, 9.7, 13.1, 10.4, 1…
## $ transit                      <dbl> 0.9, 0.3, 0.0, 0.4, 0.6, 0.0, 0.0, 0.0, …
## $ walk                         <dbl> 1.5, 0.3, 3.9, 1.4, 3.7, 1.6, 8.0, 4.2, …
## $ other_transp                 <dbl> 2.1, 1.7, 2.0, 0.8, 1.6, 2.3, 0.6, 0.6, …
## $ work_at_home                 <dbl> 3.1, 7.7, 9.3, 6.0, 9.5, 1.8, 7.8, 7.9, …
## $ mean_commute                 <dbl> 20.4, 25.7, 17.7, 28.2, 22.1, 25.1, 23.9…
## $ employed                     <dbl> 69774, 39278, 1275, 14304, 23207, 4548, …
## $ private_work                 <dbl> 78.6, 81.5, 69.6, 80.3, 56.9, 78.4, 71.1…
## $ public_work                  <dbl> 14.1, 11.8, 20.5, 11.9, 35.7, 15.0, 17.4…
## $ self_employed                <dbl> 7.1, 6.6, 9.8, 7.7, 7.4, 6.6, 11.0, 14.4…
## $ family_work                  <dbl> 0.2, 0.1, 0.0, 0.1, 0.0, 0.0, 0.4, 0.1, …
## $ unemployment                 <dbl> 10.1, 6.5, 8.7, 6.2, 16.1, 10.4, 2.6, 3.…
## $ winner                       <chr> "Trump", "Trump", "Trump", "Trump", "Bid…
## $ winner16                     <chr> "Trump", "Trump", "Trump", "Clinton", "C…
## $ geometry                     <POINT [°]> POINT (-113.7578 35.70472), POINT …
library("openxlsx")
# Write the first data set in a new workbook
write.xlsx(circles_data, 'electcion.xlsx')

Land doesn’t vote, people do.

ggplot() +
  geom_sf(data = data %>% filter(state_abbv != "AK", state_abbv != "HI"), fill = "#F5F5F5", colour = "grey70") +
  geom_sf(data = circles_data_sf, aes(colour = winner, size = total_pop),key_glyph = "rect") + 
  scale_colour_manual(values = c("#2E74C0","#CB454A"), na.translate = F) +
  scale_size_area(max_size = 7) + #scale proportional to population
  coord_sf(datum = NA) +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"),
        legend.position = "top",
        text = element_text(family = "Courier")) + 
  labs(title = "Land doesn't vote - people do",
       subtitle = "County winners scaled to population size") +
  guides(color=guide_legend(NULL), size = FALSE)

## Swing States Data

Identify Swing States

colour_scale<- c("grey","#8A0303")
fill_scale<- c("white","#8A0303")


identify_swing<- data %>%
  # Create abbreviations in data
  mutate(swing = ifelse(state_name %in% swing_states, TRUE, FALSE),
         name_swing = ifelse(swing == TRUE, state_abbv, NA)) %>%
  
  # Group by
  group_by(state_name, swing, name_swing) %>%
  summarise(long = max(long),
            lat = max(lat)) %>%
  
  # Plot
  ggplot() +
  # Geom SF
  geom_sf(aes(fill = swing, colour=swing)) +
  # Fill Scale
  scale_fill_manual(values =  fill_scale) +
  # Colour Scale
  scale_colour_manual(values = colour_scale)+
  
  # Geom SF Text
  geom_sf_text(aes(label = name_swing), 
               colour="white", 
               family = "Courier",
               face= "bold",
               size=5) +
 
   # Remove Coordinates
  coord_sf(datum = NA) + 
  # Theme Void
  theme_void() +
  
  # Add Labels
  labs(
    title = "Which are the American Swing States?", 
    subtitle = "Where the election actually happened") +
  
  # Theme Settings
  theme(
    legend.position = "none",
    text = element_text(family= "Courier",size=15),
    plot.title = element_text(face="bold")
    
    ) +
  
  NULL


identify_swing

How many electoral votes do Swing States have?

plot_votes<- electoral_votes %>%
  filter(state %in% swing_states)%>%
  arrange(votes) %>% mutate(
    cumulative = cumsum(votes)
  ) %>%
  
  ggplot() +
  geom_col(aes(x = reorder(state, cumulative), y = votes), fill="steelblue") +
  geom_line(aes(x = reorder(state, cumulative), group=1, y = cumulative)) +
  geom_point(aes(x =reorder(state, cumulative), y=cumulative)) + 
  
  # Add arrow
  
   geom_segment(aes(x = 12, y = 194, xend = 8, yend = 193),
                  arrow = arrow(length = unit(0.5, "cm")), colour="#8A0303") +
  
  # Annotate
  
  annotate("text", x = 7.5, y = 193, label = '194', size = 5, angle = 0, fontface = "bold") +
  
  # Labels
  
  labs(title = "Swing States add up to 194 electoral votes",
       subtitle = "Cumulative increase of electoral votes in Swing States",
       x = "Swing States",
       y = "Electoral Votes") + 
  
  # Theme settings
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 20),
        text = element_text(family="Courier", size=15),
        plot.title = element_text(face="bold"))

plot_votes

Swing States - Outcome in 2016

electoral_mapping %>%
  mutate(colour = replace_na(colour, "none")) %>%
  
  
  ggplot() +
  geom_sf(aes(fill=winner_by16, colour=winner_by16))+
  scale_fill_manual(values = c("#2E74C0","#CB454A" ), na.translate=FALSE) +
  scale_colour_manual(values = c("#2E74C0","#CB454A"), na.value= "grey80") +
  guides(colour = FALSE) +
  geom_sf_text(aes(label=ifelse(!is.na(winner_by16),votes," ")), color='white', family="Courier", size=5) +
  coord_sf(datum=NA) +
  theme_void() +
  labs(title = "Trump won 188 electoral votes from Swing States in 2016",
       subtitle = "Outcome in Swing States in 2016 Elections",
       fill = "Winner") +
  
  theme(text = element_text(family="Courier", size=15),
        plot.title = element_text(face="bold")) + 
  NULL

Swing States Flipped by Biden

electoral_mapping %>% 
  
  
  ggplot() +
  geom_sf(aes(fill=was_flipped, colour=was_flipped))+
  scale_fill_manual(values = c("white", "#2E74C0")) +
  scale_colour_manual(values = c("grey", "#2E74C0")) +
  geom_sf_text(aes(label=ifelse(was_flipped==TRUE,state_abbv," ")), colour="white",size=5, family = "Courier") +
  #geom_sf_label(aes(label=ifelse(was_flipped==TRUE,electoral_votes," ")),size=3.75, family = "Courier") +
  coord_sf(datum=NA) +
  theme_void() +
  labs(title = "Joe Biden Flipped 5 States in the Elections",
       subtitle = "Flipped States from 2016 to 2020") +
  
   theme(legend.position = "none",
         text = element_text(family="Courier", size=15),
        plot.title = element_text( face="bold")) +
  
  
  NULL

Swing States Flipped by Trump

electoral_mapping %>%
  mutate(trump_flipped = "States Flipped by Trump") %>%
  
  ggplot() +
  geom_sf(aes(fill=trump_flipped, colour=trump_flipped))+
  scale_fill_manual(values = c("white")) +
  scale_colour_manual(values = c("grey")) +
  guides(fill = FALSE) +
  geom_col(aes(x=0, y=0), fill="#CB454A", show.legend = TRUE) +
  coord_sf(datum=NA) +
  theme_void() +
  
  labs(title = "Donald Trump was not persuasive enough",
       subtitle = "Mr President did not manage to flip any states from 2016 to 2020",
       colour = " ")+
  theme(text = element_text(family = "Courier", size= 15),
        plot.title = element_text(face="bold")) +
  NULL

Map - Swing States Got Biden The Presidency

electoral_mapping %>%
  mutate(#was_flipped = replace_na(was_flipped,FALSE),
        # winner_by = replace_na(winner_by, "none"),
         colour = replace_na(colour, "none")) %>%
  
  
  ggplot() +
  geom_sf(aes(fill=winner_by, colour=colour, group=was_flipped), size=.75)+
  scale_fill_manual(values = c("#2E74C0","#CB454A" ), na.translate = FALSE) +
  scale_colour_manual(values = c("#2E74C0",muted("#fffb0a"),"grey80","#CB454A")) +
  geom_sf_text(aes(label=ifelse(was_flipped==TRUE,votes," ")), color='white', family="Courier", size=5) +
  coord_sf(datum=NA) +
  theme_void() +
  #theme(legend.position = "none") + 
  guides(colour = FALSE)+
  labs(title = "The Flipped Swing States Got Biden The Presidency",
       subtitle="Flipped Swing States Highlighted Made The Difference in 2020",
    fill = "Winner"
  ) +
  
    theme(text = element_text(family="Courier",size=15),
        plot.title = element_text(face="bold")) +
  
  NULL

Bar Plot - Swing States Got Biden The Presidency

# final electoral vote battle

flipped<-c("Arizona","Wisconsin","Michigan","Pennsylvania","Georgia")

plot_votes_2<- electoral_votes %>%
  filter(state %in% flipped)%>%
  arrange(votes) %>% mutate(
    cumulative = cumsum(votes)
  ) %>%
  
  ggplot() +
  geom_col(aes(x = reorder(state, cumulative), y = votes), fill="steelblue") +
  geom_line(aes(x = reorder(state, cumulative), group=1, y = cumulative)) +
  geom_point(aes(x =reorder(state, cumulative), y=cumulative)) + 
  
  # Add arrow
  
  geom_segment(aes(x = 5, y = 20, xend = 5, yend = 73),
                  arrow = arrow(length = unit(0.5, "cm")), colour="#8A0303") +
  
  # Arrow heading down
  
   geom_segment(aes(x = 5, y = 73, xend = 5, yend = 20),
                  arrow = arrow(length = unit(0.5, "cm")), colour="#8A0303") +
  
  # Annotate
  
  annotate("text", x = 4.5, y = 50, label = '73 Votes', size = 13, angle = 0, fontface = "bold") +
  
  # Labels
 # geom_text_repel(aes(x = states, y = votes, label=votes)) +
    labs(title = "The Swing States Got Biden The Presidency",
       subtitle = "Cumulative increase of electoral votes in Swing States flipped",
       x = "Swing States Flipped",
       y = "Electoral Votes") + 
  
  # Theme settings
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 20),
        text = element_text(family="Courier", size=20),
        plot.title = element_text(face="bold"))





plot_votes_2

Demographics data

We will now analyze the demographics of the data.

Biden wins in Counties with more people.

population_data<- data %>%
  filter(total_pop<2500000) 
  
plot<- ggplot(population_data,aes(x=total_pop,y=percentage20_joe_biden))+
  geom_point(alpha=0.5,colour='blue')+
  xlab('Total population of county') +
  ylab('Percentage voted for Joe Biden')+
  labs(title="Biden wins in counties with more people!",
       subtitle ="Percentage votes to Biden by population in county") +
  theme_minimal()+
  theme(text = element_text(family="Courier"))
  #theme(plot.title = element_text(face = "bold"))
#ggtitle("Biden wins in counties with more people!")


ggplotly(plot) %>%
  layout(title = list(text = paste0('<b>','Biden wins in counties with more people!','<b>',
                                    '<br>',
                                    '<sup>',
                                    'Percentage votes to Biden by population in county',
                                    '</sup>')))

Did the rich vote for Trump?

plot_1<- ggplot(data,aes(x=income_per_cap,y=percentage20_joe_biden,color=unemployment))+
  geom_point(alpha=0.8)+
  scale_colour_gradientn(colours = terrain.colors(30))+
  xlab('Income per capita for county') +
  ylab('Percentage voted for Joe Biden')+
  theme_minimal()+
  theme(text = element_text(family="Courier"))+
  ggtitle("Did the rich vote for Trump? Not really...")

ggplotly(plot_1) %>%
  layout(title = list(text = paste0('<b>','Did the rich vote for Trump? Not really...','<b>',
                                    '<br>',
                                    '<sup>',
                                    'Percentage votes to Biden by income per capita and unemployment ',
                                    '</sup>')))

White people vote more for Trump?

g<-pivot_longer(data,c(black,white),names_to="race",values_to = "percentage")

#plot_2<- ggplot(g,aes(x=percentage,y=percentage20_donald_trump,colour=race))+
plot_2<- ggplot(g,aes(x=percentage,y=percentage20_donald_trump,colour=race))+
  theme_minimal()+
  geom_point(alpha=0.5)+
  xlab('percentage of race in the county') +
  ylab('Percentage voted for Donald Trump')+
  ggtitle("White people vote more for Trump?")+
  theme(text = element_text(family="Courier"))+
  scale_color_manual(breaks = c("black", "white"),
                        values=c("blue", "red"))

ggplotly(plot_2) %>%
  layout(title = list(text = paste0('<b>','White people vote more for Trump?','<b>',
                                    '<br>',
                                    '<sup>',
                                    'Percentage votes to Trump split by race',
                                    '</sup>'))) 
#plot_2

But who did actually vote for trump?

#z<-pivot_longer(data,c(self_employed,family_work),names_to="profession_type",values_to = "percentage_prof") 

plot_3<- 
  ggplot(data,aes(x=self_employed,y=percentage20_donald_trump))+
  theme_minimal()+
  geom_point(alpha=0.5,colour='red')+
  #facet_wrap(~ profession_type) +
  xlab('percentage of self employed people in the county') +
  ylab('Percentage voted for Donald Trump')+
  ggtitle("But who did actually vote for Trump?")+
    theme(text = element_text(family="Courier"))

ggplotly(plot_3) %>%
    layout(title = list(text = paste0('<b>','But who did actually vote for Trump?','<b>',
                                    '<br>',
                                    '<sup>',
                                    'Percentage votes to Trump by self employed percentage in county',
                                    '</sup>')))

Voter Turnout Rate

Time series plot

We will import 2 new datasets.

turnout_2016 <- read.csv("data/turnout_2016.csv") %>% 
  clean_names() 

turnout_2020 <- read.csv("data/turnout_2020.csv") %>% 
  clean_names()

#join two dataset
data_turnout <- turnout_2016 %>% 
  left_join(turnout_2020, by = c("state" = "state")) %>% 
  rename(turnout_2016 = turnout_rate.x, turnout_2020 = turnout_rate.y) %>% 
  select(state, turnout_2016, turnout_2020)

glimpse(data_turnout)
## Rows: 52
## Columns: 3
## $ state        <chr> "Minnesota", "New Hampshire", "Maine", "Iowa", "Wisconsi…
## $ turnout_2016 <dbl> 74.16, 70.31, 69.92, 68.56, 68.33, 67.86, 66.85, 65.61, …
## $ turnout_2020 <dbl> 79.9, 75.5, 76.3, 73.2, 75.8, 76.4, 75.5, 71.7, 73.0, 72…

Highest Turnout Rate Ever

library(extrafont)
extrafont::loadfonts(device="pdf")
turnout_1920_2020 <- read.csv("data/turnout_1920_2020.csv") %>% 
  clean_names() 

highlight <- turnout_1920_2020[turnout_1920_2020$year == 2020, ]
turnout_1920_2020 %>% 
ggplot(aes(x=year, y=united_states_presidential_vep_turnout_rate))+
  geom_line(size = 1, color = "#2c7fb8") +
  geom_point(color = "#2c7fb8") +
  geom_label(data = highlight, 
                   aes(label = "66.7%"), 
                   color = "#2c7fb8",
                   box.padding = 0.25,
                   point.padding = 0.5,
             vjust = -0.5,
             hjust = 0.6,
             family = "Courier")+
  theme_minimal() +
  labs(title = "The U.S. experienced highest turnout rate in over a century",
       subtitle = "1920-2020 Voting eligible population turnout rates",
       x = "Election Year",
       y = "Turnout Rate %") +
#  geom_vline(xintercept = 2020, color = "red", linetype = 5) +
#  gghighlight::gghighlight(year == 2020, label_key = united_states_presidential_vep_turnout_rate) +
  theme_minimal() +
  scale_x_continuous(n.break = 10) +
  theme(panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank(),
        plot.title = element_text(face = "bold", size = 12),
        plot.subtitle = element_text(size = 10),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        text = element_text(family="Courier")) +
  expand_limits(y = c(40, 80))

In 2020 US election, More Americans voted than in any other in over 100 years. 66.7% percent of the voting-eligible population cast a ballot, delivering the popular vote and electoral college to Joe Biden, the Democratic candidate. Despite that the pandemic introduced a number of complications to voting day, early voting and mail-in ballots brought a record turnout for some states.

2016 vs 2020 turnout by state

swing_states <- c("Arizona","Texas","Florida","Georgia","Pennsylvania","Ohio","Wisconsin","North Carolina","South Carolina","Iowa","Nevada","Michigan")
swing_win <- c( "#2E74C0", "#CB454A", "#CB454A", "#2E74C0", "#2E74C0", "#CB454A", "#2E74C0","#CB454A", "#CB454A", "#CB454A",  "#2E74C0", "#2E74C0")
swing_colors <- data.frame(swing_states, swing_win)
swing_colors1 <- swing_colors$swing_win
names(swing_colors1) <- swing_colors$swing_states
data_turnout1 <- data_turnout %>% 
  filter(state %in% swing_states) %>% 
  pivot_longer(!state, names_to = "turnout_rate", values_to = "value")
  glimpse(data_turnout1)
## Rows: 24
## Columns: 3
## $ state        <chr> "Iowa", "Iowa", "Wisconsin", "Wisconsin", "Florida", "Fl…
## $ turnout_rate <chr> "turnout_2016", "turnout_2020", "turnout_2016", "turnout…
## $ value        <dbl> 68.56, 73.20, 68.33, 75.80, 65.61, 71.70, 64.59, 73.90, …
data_turnout1$value <- round(data_turnout1$value,1)
ggplot(data=data_turnout1,aes(x = turnout_rate, y = value, group = state)) +
  geom_line(size = 1)+
  geom_point() +
  geom_text(aes(x = turnout_rate, y = value, label = value), vjust = 2, size = 3, family = "Courier")+
  facet_wrap(~state) +
  scale_y_continuous(limits = c(40, 80)) +
  aes(color = state) +
  scale_color_manual(values = swing_colors1) +
    theme_bw()+
      theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(),
          plot.title = element_text(face = "bold", size = 15),
          legend.position = "none",
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank(),
          strip.text.x = element_text(size = 9, face = "bold"),
          text = element_text(family="Courier"))+
  scale_x_discrete(labels=c("turnout_2016" = "2016", "turnout_2020" = "2020")) +
    labs(title = "Turnout on the rise in key battlegrounds",
         subtitle = "From 2016 to 2020 presidential election")

Most of states saw an increase in voter turnout since 2016, with key battleground states like Florida, Michigan, Wisconsin and Pennsylvania seeing participation well above the national rate.

Turnout map

states_sf <- get_urbn_map("states", sf = TRUE)
data_state <- states_sf %>% 
  left_join(data_turnout, by = c("state_name" = "state"))
glimpse(data_state)
## Rows: 51
## Columns: 6
## $ state_fips   <chr> "01", "04", "08", "09", "12", "13", "16", "18", "20", "2…
## $ state_abbv   <chr> "AL", "AZ", "CO", "CT", "FL", "GA", "ID", "IN", "KS", "L…
## $ state_name   <chr> "Alabama", "Arizona", "Colorado", "Connecticut", "Florid…
## $ turnout_2016 <dbl> 58.65, 56.35, 67.86, 62.59, 65.61, 60.00, 59.68, 56.18, …
## $ turnout_2020 <dbl> 63.1, 65.9, 76.4, 71.1, 71.7, 67.7, 67.7, 61.4, 64.2, 64…
## $ geometry     <MULTIPOLYGON [m]> MULTIPOLYGON (((1150023 -15..., MULTIPOLYGO…
data_state %>% 
  ggplot(aes()) +
  geom_sf(aes(fill = turnout_2020), colour = "#ffffff")+
  theme_void() +
  labs(title = "Turnout rate in different states",
       subtitle = "2020 Election",
       fill = "turnout rate") +
  geom_sf_text(aes(label = state_abbv), color = "white", size = 2, family = "Courier") +
  theme(legend.position = "bottom",
        plot.title = element_text(face = "bold", size = 14),
        text = element_text(family="Courier")) +
  scale_fill_gradient(low = "#1a9641",
                        high = "#d7191c")

## Top states with highest turnout rates

data_turnout_15 <- data_turnout %>%
  arrange(desc(turnout_2020)) %>% 
  top_n(n=15, wt = turnout_2020)
glimpse(data_turnout_15)
## Rows: 15
## Columns: 3
## $ state        <chr> "Minnesota", "Colorado", "Maine", "Wisconsin", "Washingt…
## $ turnout_2016 <dbl> 74.16, 67.86, 69.92, 68.33, 62.46, 70.31, 66.85, 64.66, …
## $ turnout_2020 <dbl> 79.9, 76.4, 76.3, 75.8, 75.7, 75.5, 75.5, 74.2, 73.9, 73…
data_turnout_15$state
##  [1] "Minnesota"      "Colorado"       "Maine"          "Wisconsin"     
##  [5] "Washington"     "New Hampshire"  "Oregon"         "Vermont"       
##  [9] "Michigan"       "Iowa"           "Montana"        "Virginia"      
## [13] "Massachusetts"  "Florida"        "North Carolina"
top_states <- c("Minnesota","Colorado","Maine","Wisconsin","Washington","New Hampshire","Oregon","Vermont","Michigan","Iowa","Montana","Virginia","Massachusetts","Florida","North Carolina")
top_win <- c( "#2E74C0", "#2E74C0","#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#CB454A", "#CB454A", "#2E74C0", "#2E74C0", "#CB454A","#CB454A")
top_colors <- data.frame(top_states, top_win)
top_colors1 <- top_colors$top_win
names(top_colors1) <- top_colors$top_states
  ggplot(data = data_turnout_15, aes(x=reorder(state, turnout_2020), y = turnout_2020)) +
  geom_point(size = 2) +
  coord_flip() +
    theme_minimal() +
    expand_limits(y = c(70, 80)) +
      labs(title = "Biden won states with high turnout rate",
           subtitle = "Top 15 states with highest turnout rates in 2020",
         y = "turnout rate")+
    theme(legend.position = "none",
        axis.title.y = element_blank(),
        plot.title = element_text(face = "bold", size = 16),
        text = element_text(family="Courier")) +
  geom_text(data=data_turnout_15, aes(x=state, y=turnout_2020, label = turnout_2020), vjust = 0.2, hjust = 1.1, color = "white") +
    aes(color = state) +
    scale_color_manual(values = top_colors1)

Among all the states, Minnesota, Colorado, Maine and Wisconsin have the highest turnout rate, and Biden won the majority of votes in those states with high voter turnout rate.

##Correlation Create correlation between Joe Biden and other demographic characteristics

data1<-data
data_turnout_new<-data_turnout %>% 
  filter(state!="United States") %>%
  rename(X.x=turnout_2016, X.y=turnout_2020) %>% 
  mutate(difference= (X.y-X.x)/X.x *100)
covid_data<-data1 %>% 
  group_by(state_name) %>% 
  summarise(cases= sum(cases),population=sum(total_pop), lat=round(mean(lat), 0), long =round(mean(long), 0)) %>% 
  mutate(cases_per_1M=round(cases*1000000/population,0)) 
covid_data<-st_drop_geometry(covid_data)
states <-get_urbn_map("states", sf = TRUE)
turnout_and_covid <- states %>% 
  left_join(covid_data, by = "state_name") %>% 
  left_join(data_turnout_new, by= c("state_name"= "state"))
#deactivate geomtery to mutate columns

st_geometry(circles_data_sf) <- NULL

#Create corrlation matrix
correlation <-as.data.frame(cor(circles_data_sf[, 10:54], 
                                method = "pearson", 
                                use = "complete.obs")) %>%
  #Rename empty columns
  mutate(names = rownames(.)) %>% 
  #Arrange in descending order 
  arrange(desc(percentage20_joe_biden)) %>%
  #Split into categories -Ethnicity, Economy, Job divisions and the rest (0)
  mutate(number= ifelse(names %in% c("black", "asian", "pacific", "native", "hispanic","white"),1, ifelse(names %in% c("total_pop", "employed", "income_per_cap", "poverty"),2, ifelse(names %in% c("professional", "service", "office", "family_work", "production", "self_employed", "construction"), 3, 0)))) %>% 
  filter(number !=0) %>% 
  arrange(percentage20_joe_biden) %>% 
  arrange(desc(number))%>% 
  mutate(no=row_number())
#Create 3 different datafremes for next plots
correlation1<-correlation %>% 
  filter(number ==1) 
correlation2<-correlation %>% 
  filter(number ==2) 
correlation3<-correlation %>% 
  filter(number ==3) 
#Create first plot (for Ethnicity), reordered by correlation with Joe Biden
cor1<-ggplot(correlation1, aes(x=reorder(names, no),
                               y=percentage20_joe_biden, 
                               label=round(percentage20_joe_biden,2))) +
  #Add column names instead of numerical value
  geom_point(stat='identity', aes(col=percentage20_joe_biden), size=9)  +
  #Add gradient color in the bubble 
  scale_color_gradient2("",
                        low = muted("#CB454A"),
                        mid = "white", 
                        high = muted("#2E74C0"), 
                        midpoint=0, breaks=c(-0.5,0,0.45), 
                        labels=c("More Trump",0,"More Biden"))+ 
  
  geom_text(color="black", size=2) +
  #Show correlations between these values
  ylim(-0.6, 0.6) +
  #Change theme
  theme_minimal()+  
  #Remove x axis, change size of text and font
  theme(panel.grid.major.x= element_blank(), 
         panel.grid.minor.x = element_blank(),
         panel.background = element_blank(),
         axis.ticks=element_blank(),
        text=element_text(size=12, family="Courier"), 
        plot.title = element_text(color=muted("#CB454A"), face = "bold"))+
  
  labs(title="Blacks & Asians Voted J.Biden, People Working in Construction or Production Did Not", subtitle= "Correlation between Demographics and Votes for J.Biden",
       x="Ethnicity",
       y= "",
       fill="")+
  #Change sizes of text
  theme(plot.title = element_text(size=13), 
        plot.subtitle=element_text(size=12.5), 
        plot.caption = element_text(size=6))+
  #Flip the coordinates
  coord_flip()
#Create second plot for Economy
cor2<-ggplot(correlation2, aes(x=reorder(names, no),
                               y=percentage20_joe_biden, 
                               label=round(percentage20_joe_biden,2))) + 
  #Use stat=identity to show names
  geom_point(stat='identity', aes(col=percentage20_joe_biden), size=9)  +
  
  scale_color_gradient2("",low = muted("#CB454A"),
                        mid = "white", 
                        high = muted("#2E74C0"), 
                        midpoint=0, 
                        breaks=c(-0.5,0,0.45),
                        labels=c("More Trump",0,"More Biden"))+ 
  geom_text(color="black", size=2) +
  ylim(-0.6, 0.6) +
  #Flip coordinates
  coord_flip()+
  theme_minimal()+ 
  #Change theme 
  theme(panel.grid.major.x= element_blank(), 
         panel.grid.minor.x = element_blank(),
         panel.background = element_blank(),
         axis.ticks=element_blank(),
         text=element_text(size=12, family="Courier"))+
  #Add the category
  labs(y=NULL, x="Economy", fill="")
#Create third plot for Job division
cor3<-ggplot(correlation3, 
             aes(x=reorder(names, no), 
                 y=percentage20_joe_biden, 
                 label=round(percentage20_joe_biden,2))) + 
  
  geom_point(stat='identity', aes(col=percentage20_joe_biden), size=9)  +
  #Add diverging color from red for Trump to blue for Biden
  scale_color_gradient2("",low = muted("#CB454A"),
                        mid = "white",
                        high = muted("#2E74C0"), 
                        midpoint=0, breaks=c(-0.5,0,0.45), 
                        labels=c("More Trump",0,"More Biden"))+
  
  geom_text(color="black", size=2) +
  ylim(-0.6, 0.6) +
  coord_flip()+
  theme_minimal()+
  #Change theme
  theme(panel.grid.major.x= element_blank(), 
         panel.grid.minor.x = element_blank(),
         panel.background = element_blank(),
         axis.ticks=element_blank(),
         text=element_text(size=12, family="Courier"))+
  #Put x axis (but it will be flipped)
  labs(y="Correlation with % Votes Joe Biden", x="Job division", fill="")
#Combine plots
library(ggpubr)
#Arrange the plots on top of each other
ggarrange(cor1, cor2,cor3, ncol=1, nrow=3, align="hv", common.legend = TRUE, legend= "right", widths = c(15,1))

Plot correlation between covid rates and turnout rate

turnout_and_covid%>%
  ggplot(aes(x=cases_per_1M,y= X.y)) + 
  geom_point(aes(label=state_abbv))+
  #Add line
  geom_smooth(method= "lm", se=FALSE, color=muted("#CB454A"))+
  #Change theme
  theme_minimal()+
  #Add lable titles
  labs(x="Coronavirus Cases per 1M",
       y= "Turnout Rate per State", 
       title= "More Covid less Votes!", 
       subtitle="States' Turnout Rates and Level of Covid per 1 Million People")+
  #Change font and size
  theme(text=element_text(size=10, family="Courier"), 
        plot.title = element_text(color=muted("#CB454A"), face = "bold"))